cwurData <- read.csv("cwurData.csv")
educationExpenditure <- read.csv("education_expenditure_supplementary_data.csv")
educationalAttainment <- read.csv("educational_attainment_supplementary_data.csv")
schoolCountry <- read.csv("school_and_country_table.csv")
shanghaiData <- read.csv("shanghaiData.csv")
timesData <- read.csv("timesData.csv")
knitr::kable(head(cwurData,10), caption = "Central World University Rankings information (first 10 rows)")
| world_rank | institution | country | national_rank | quality_of_education | alumni_employment | quality_of_faculty | publications | influence | citations | broad_impact | patents | score | year |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | Harvard University | USA | 1 | 7 | 9 | 1 | 1 | 1 | 1 | NA | 5 | 100.00 | 2012 |
| 2 | Massachusetts Institute of Technology | USA | 2 | 9 | 17 | 3 | 12 | 4 | 4 | NA | 1 | 91.67 | 2012 |
| 3 | Stanford University | USA | 3 | 17 | 11 | 5 | 4 | 2 | 2 | NA | 15 | 89.50 | 2012 |
| 4 | University of Cambridge | United Kingdom | 1 | 10 | 24 | 4 | 16 | 16 | 11 | NA | 50 | 86.17 | 2012 |
| 5 | California Institute of Technology | USA | 4 | 2 | 29 | 7 | 37 | 22 | 22 | NA | 18 | 85.21 | 2012 |
| 6 | Princeton University | USA | 5 | 8 | 14 | 2 | 53 | 33 | 26 | NA | 101 | 82.50 | 2012 |
| 7 | University of Oxford | United Kingdom | 2 | 13 | 28 | 9 | 15 | 13 | 19 | NA | 26 | 82.34 | 2012 |
| 8 | Yale University | USA | 6 | 14 | 31 | 12 | 14 | 6 | 15 | NA | 66 | 79.14 | 2012 |
| 9 | Columbia University | USA | 7 | 23 | 21 | 10 | 13 | 12 | 14 | NA | 5 | 78.86 | 2012 |
| 10 | University of California, Berkeley | USA | 8 | 16 | 52 | 6 | 6 | 5 | 3 | NA | 16 | 78.55 | 2012 |
knitr::kable(head(shanghaiData,10),caption="Shanghai Ranking information (first 10 rows)")
| world_rank | university_name | national_rank | total_score | alumni | award | hici | ns | pub | pcp | year |
|---|---|---|---|---|---|---|---|---|---|---|
| 1 | Harvard University | 1 | 100.0 | 100.0 | 100.0 | 100.0 | 100.0 | 100.0 | 72.4 | 2005 |
| 2 | University of Cambridge | 1 | 73.6 | 99.8 | 93.4 | 53.3 | 56.6 | 70.9 | 66.9 | 2005 |
| 3 | Stanford University | 2 | 73.4 | 41.1 | 72.2 | 88.5 | 70.9 | 72.3 | 65.0 | 2005 |
| 4 | University of California, Berkeley | 3 | 72.8 | 71.8 | 76.0 | 69.4 | 73.9 | 72.2 | 52.7 | 2005 |
| 5 | Massachusetts Institute of Technology (MIT) | 4 | 70.1 | 74.0 | 80.6 | 66.7 | 65.8 | 64.3 | 53.0 | 2005 |
| 6 | California Institute of Technology | 5 | 67.1 | 59.2 | 68.6 | 59.8 | 65.8 | 52.5 | 100.0 | 2005 |
| 7 | Columbia University | 6 | 62.3 | 79.4 | 60.6 | 56.1 | 54.2 | 69.5 | 45.4 | 2005 |
| 8 | Princeton University | 7 | 60.9 | 63.4 | 76.8 | 60.9 | 48.7 | 48.5 | 59.1 | 2005 |
| 9 | University of Chicago | 8 | 60.1 | 75.6 | 81.9 | 50.3 | 44.7 | 56.4 | 42.2 | 2005 |
| 10 | University of Oxford | 2 | 59.7 | 64.3 | 59.1 | 48.4 | 55.6 | 68.4 | 53.2 | 2005 |
knitr::kable(head(educationalAttainment,10),caption="Education attainment information (first 10 rows)")
| country_name | series_name | X1985 | X1986 | X1987 | X1990 | X1991 | X1992 | X1993 | X1995 | X1996 | X1997 | X1998 | X1999 | X2000 | X2001 | X2002 | X2003 | X2004 | X2005 | X2006 | X2007 | X2008 | X2009 | X2010 | X2011 | X2012 | X2013 | X2015 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Afghanistan | Barro-Lee: Average years of primary schooling, age 15+, female | 0.33 | NA | NA | 0.44 | NA | NA | NA | 0.57 | NA | NA | NA | NA | 0.75 | NA | NA | NA | NA | 0.86 | NA | NA | NA | NA | 1.27 | NA | NA | NA | NA |
| Afghanistan | Barro-Lee: Average years of primary schooling, age 15+, total | 1.03 | NA | NA | 1.26 | NA | NA | NA | 1.54 | NA | NA | NA | NA | 2.01 | NA | NA | NA | NA | 2.18 | NA | NA | NA | NA | 2.64 | NA | NA | NA | NA |
| Afghanistan | Barro-Lee: Average years of primary schooling, age 15-19, female | 0.83 | NA | NA | 0.95 | NA | NA | NA | 1.26 | NA | NA | NA | NA | 1.92 | NA | NA | NA | NA | 1.01 | NA | NA | NA | NA | 2.45 | NA | NA | NA | NA |
| Afghanistan | Barro-Lee: Average years of primary schooling, age 15-19, total | 2.34 | NA | NA | 2.22 | NA | NA | NA | 2.37 | NA | NA | NA | NA | 3.83 | NA | NA | NA | NA | 2.26 | NA | NA | NA | NA | 3.55 | NA | NA | NA | NA |
| Afghanistan | Barro-Lee: Average years of primary schooling, age 20-24, female | 0.54 | NA | NA | 0.92 | NA | NA | NA | 0.94 | NA | NA | NA | NA | 1.26 | NA | NA | NA | NA | 2.00 | NA | NA | NA | NA | 1.29 | NA | NA | NA | NA |
| Afghanistan | Barro-Lee: Average years of primary schooling, age 20-24, total | 1.52 | NA | NA | 2.51 | NA | NA | NA | 2.27 | NA | NA | NA | NA | 2.48 | NA | NA | NA | NA | 3.93 | NA | NA | NA | NA | 2.64 | NA | NA | NA | NA |
| Afghanistan | Barro-Lee: Average years of primary schooling, age 25+, female | 0.17 | NA | NA | 0.25 | NA | NA | NA | 0.37 | NA | NA | NA | NA | 0.48 | NA | NA | NA | NA | 0.63 | NA | NA | NA | NA | 0.81 | NA | NA | NA | NA |
| Afghanistan | Barro-Lee: Average years of primary schooling, age 25+, total | 0.66 | NA | NA | 0.85 | NA | NA | NA | 1.14 | NA | NA | NA | NA | 1.38 | NA | NA | NA | NA | 1.69 | NA | NA | NA | NA | 2.19 | NA | NA | NA | NA |
| Afghanistan | Barro-Lee: Average years of primary schooling, age 25-29, female | 0.44 | NA | NA | 0.54 | NA | NA | NA | 0.92 | NA | NA | NA | NA | 0.94 | NA | NA | NA | NA | 1.26 | NA | NA | NA | NA | 1.92 | NA | NA | NA | NA |
| Afghanistan | Barro-Lee: Average years of primary schooling, age 25-29, total | 1.28 | NA | NA | 1.52 | NA | NA | NA | 2.51 | NA | NA | NA | NA | 2.27 | NA | NA | NA | NA | 2.48 | NA | NA | NA | NA | 3.93 | NA | NA | NA | NA |
knitr::kable(head(educationExpenditure,10),caption="Education expenditure information (first 10 rows)")
| country | institute_type | direct_expenditure_type | X1995 | X2000 | X2005 | X2009 | X2010 | X2011 |
|---|---|---|---|---|---|---|---|---|
| OECD Average | All Institutions | Public | 4.9 | 4.9 | 5.0 | 5.4 | 5.4 | 5.3 |
| Australia | All Institutions | Public | 4.5 | 4.6 | 4.3 | 4.5 | 4.6 | 4.3 |
| Austria | All Institutions | Public | 5.3 | 5.4 | 5.2 | 5.7 | 5.6 | 5.5 |
| Belgium | All Institutions | Public | 5.0 | 5.1 | 5.8 | 6.4 | 6.4 | 6.4 |
| Canada | All Institutions | Public | 5.8 | 5.2 | 4.8 | 5.0 | 5.2 | NA |
| Chile | All Institutions | Public | NA | 4.2 | 3.3 | 4.1 | 4.3 | 3.9 |
| Czech Republic | All Institutions | Public | 4.8 | 4.2 | 4.1 | 4.2 | 4.1 | 4.4 |
| Denmark | All Institutions | Public | 6.5 | 6.4 | 6.8 | 7.5 | 7.6 | 7.5 |
| Estonia | All Institutions | Public | NA | NA | 4.7 | 5.9 | 5.6 | 5.2 |
| Finland | All Institutions | Public | 6.6 | 5.5 | 5.9 | 6.3 | 6.4 | 6.3 |
knitr::kable(head(timesData,10),caption="Times Higher Education World University Rankings data information (first 10 rows)")
| world_rank | university_name | country | teaching | international | research | citations | income | total_score | num_students | student_staff_ratio | international_students | female_male_ratio | year |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | Harvard University | United States of America | 99.7 | 72.4 | 98.7 | 98.8 | 34.5 | 96.1 | 20,152 | 8.9 | 25% | 2011 | |
| 2 | California Institute of Technology | United States of America | 97.7 | 54.6 | 98.0 | 99.9 | 83.7 | 96.0 | 2,243 | 6.9 | 27% | 33 : 67 | 2011 |
| 3 | Massachusetts Institute of Technology | United States of America | 97.8 | 82.3 | 91.4 | 99.9 | 87.5 | 95.6 | 11,074 | 9.0 | 33% | 37 : 63 | 2011 |
| 4 | Stanford University | United States of America | 98.3 | 29.5 | 98.1 | 99.2 | 64.3 | 94.3 | 15,596 | 7.8 | 22% | 42 : 58 | 2011 |
| 5 | Princeton University | United States of America | 90.9 | 70.3 | 95.4 | 99.9 | - | 94.2 | 7,929 | 8.4 | 27% | 45 : 55 | 2011 |
| 6 | University of Cambridge | United Kingdom | 90.5 | 77.7 | 94.1 | 94.0 | 57.0 | 91.2 | 18,812 | 11.8 | 34% | 46 : 54 | 2011 |
| 6 | University of Oxford | United Kingdom | 88.2 | 77.2 | 93.9 | 95.1 | 73.5 | 91.2 | 19,919 | 11.6 | 34% | 46 : 54 | 2011 |
| 8 | University of California, Berkeley | United States of America | 84.2 | 39.6 | 99.3 | 97.8 | - | 91.1 | 36,186 | 16.4 | 15% | 50 : 50 | 2011 |
| 9 | Imperial College London | United Kingdom | 89.2 | 90.0 | 94.5 | 88.3 | 92.9 | 90.6 | 15,060 | 11.7 | 51% | 37 : 63 | 2011 |
| 10 | Yale University | United States of America | 92.1 | 59.2 | 89.7 | 91.5 | - | 89.5 | 11,751 | 4.4 | 20% | 50 : 50 | 2011 |
knitr::kable(head(schoolCountry,10),caption="School & country information (first 10 rows)")
| school_name | country |
|---|---|
| Harvard University | United States of America |
| California Institute of Technology | United States of America |
| Massachusetts Institute of Technology | United States of America |
| Stanford University | United States of America |
| Princeton University | United States of America |
| University of Cambridge | United Kingdom |
| University of Oxford | United Kingdom |
| University of California, Berkeley | United States of America |
| Imperial College London | United Kingdom |
| Yale University | United States of America |
glimpse(cwurData)
## Rows: 2,200
## Columns: 14
## $ world_rank <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15~
## $ institution <chr> "Harvard University", "Massachusetts Institute of~
## $ country <chr> "USA", "USA", "USA", "United Kingdom", "USA", "US~
## $ national_rank <int> 1, 2, 3, 1, 4, 5, 2, 6, 7, 8, 9, 10, 11, 1, 12, 1~
## $ quality_of_education <int> 7, 9, 17, 10, 2, 8, 13, 14, 23, 16, 15, 21, 31, 3~
## $ alumni_employment <int> 9, 17, 11, 24, 29, 14, 28, 31, 21, 52, 26, 42, 16~
## $ quality_of_faculty <int> 1, 3, 5, 4, 7, 2, 9, 12, 10, 6, 8, 14, 24, 31, 20~
## $ publications <int> 1, 12, 4, 16, 37, 53, 15, 14, 13, 6, 34, 22, 9, 8~
## $ influence <int> 1, 4, 2, 16, 22, 33, 13, 6, 12, 5, 20, 21, 10, 19~
## $ citations <int> 1, 4, 2, 11, 22, 26, 19, 15, 14, 3, 28, 16, 8, 23~
## $ broad_impact <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ patents <int> 5, 1, 15, 50, 18, 101, 26, 66, 5, 16, 101, 10, 9,~
## $ score <dbl> 100.00, 91.67, 89.50, 86.17, 85.21, 82.50, 82.34,~
## $ year <int> 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2~
cwurData %>% group_by(year) %>%
select(year,institution,world_rank) %>% top_n(-5, wt = world_rank) -> cwurTop5
plot_ly(cwurTop5, x = ~year) %>%
add_trace(y = cwurTop5$world_rank, name = cwurTop5$institution, showlegend=TRUE, type = 'scatter', mode = 'lines+markers', color= cwurTop5$institution) %>%
layout(title="World Ranked Universities by CWUR (2012-2015)",
xaxis = list(showticklabels = TRUE, tickangle = 0, tickfont = list(size = 8)),
yaxis = list(title = "World rank"),
hovermode = 'compare')
cwurPlotYear <- function(nYear) {
cwurData %>% filter(year==nYear) %>% top_n(10,-world_rank) %>%
ggplot(aes(x=reorder(institution,-world_rank), y=world_rank)) + geom_bar(stat="identity", aes(fill=reorder(institution,-world_rank)), colour="black") +
theme_bw() + coord_flip() + scale_fill_manual(values=c(rep("lightgreen",7), "#CD7F32", "grey", "gold")) + guides(fill=FALSE) +
labs(x="Institution", y="World Rank",
title=paste("Rank in ",nYear), subtitle="(smaller value is better)")
}
cwurPlotYear(2012) -> d1
cwurPlotYear(2013) -> d2
cwurPlotYear(2014) -> d3
cwurPlotYear(2015) -> d4
grid.arrange(d1,d2,d3,d4, ncol=2)
cwurData %>% group_by(country) %>% summarise(n = length(publications)) %>% top_n(10,n) %>% ungroup() -> c
cwurData %>% filter(country %in% c$country) %>%
ggplot(aes(x=country, y=publications, col=country)) + guides(col=FALSE) +
geom_boxplot() + theme_bw() + coord_flip() +
labs(x="Country", y="Rank by publication",
title="Rank by publication", subtitle="Grouped by country, smaller value is better") + theme(text = element_text(size = 20)) -> d1
cwurData %>% filter(country %in% c$country) %>%
ggplot(aes(x=country, y=citations, col=country)) + guides(col=FALSE) +
geom_boxplot() + theme_bw() + coord_flip() +
labs(x="Country", y="Rank by citations",
title="Rank by citations", subtitle="Grouped by country, smaller value is better") + theme(text = element_text(size = 20)) -> d2
cwurData %>% filter(country %in% c$country) %>%
ggplot(aes(x=country, y=patents, col=country)) + guides(col=FALSE) +
geom_boxplot() + theme_bw() + coord_flip() +
labs(x="Country", y="Rank by patents",
title="Rank by patents", subtitle="Grouped by country, smaller value is better") + theme(text = element_text(size = 20)) -> d3
cwurData %>% filter(country %in% c$country) %>%
ggplot(aes(x=country, y=quality_of_education, col=country)) + guides(col=FALSE) +
geom_boxplot() + theme_bw() + coord_flip() +
labs(x="Country", y="Rank by quality of education",
title="Rank by quality of education", subtitle="Grouped by country, smaller value is better") + theme(text = element_text(size = 20)) -> d4
cwurData %>% filter(country %in% c$country) %>%
ggplot(aes(x=country, y=alumni_employment, col=country)) + guides(col=FALSE) +
geom_boxplot() + theme_bw() + coord_flip() +
labs(x="Country", y="Rank by alumni employment",
title="Rank by alumni employment", subtitle="Grouped by country, smaller value is better") + theme(text = element_text(size = 20)) -> d5
cwurData %>% filter(country %in% c$country) %>%
ggplot(aes(x=country, y=quality_of_faculty, col=country)) + guides(col=FALSE) +
geom_boxplot() + theme_bw() + coord_flip() +
labs(x="Country", y="Rank by quality of faculty",
title="Rank by quality of faculty", subtitle="Grouped by country, smaller value is better") + theme(text = element_text(size = 20)) -> d6
grid.arrange(d1,d2,d3,d4,d5,d6, ncol=2)
cwurData %>% group_by(country,year) %>%
summarise(nr = length(world_rank), minw=min(world_rank), maxw=max(world_rank), avgw=round(mean(world_rank),0)) %>%
select(country, year, nr, minw, maxw, avgw) %>% ungroup() -> ccwur
# light grey boundaries
l <- list(color = toRGB("grey"), width = 0.5)
ccwur$hover <- with(ccwur,
paste("Country: ", country, '<br>',
"Year: ",year, "<br>",
"Universities in top: ", nr, "<br>",
"Min rank in top: ", minw, "<br>",
"Max rank in top: ", maxw, "<br>",
"Mean rank in top: ", avgw,"<br>"
))
g <- list(
showframe = TRUE,
showcoastlines = TRUE,
projection = list(type = 'orthogonal')
)
plot_geo(ccwur, locationmode = 'country names') %>%
add_trace(
z = ~nr, color = ~nr, colors = 'Spectral', frame = ~year,
text = ~hover, locations=~country, marker = list(line = l)
) %>%
colorbar(title = 'Number of\nuniversities in top', tickprefix = '') %>%
layout(
title = with(ccwur, paste('Number of universities in top<br>Source:<a href="http://cwur.org/">Council of World University Ranking</a>')),
geo = g
)
shanghaiDataCld = shanghaiData
shanghaiDataCld$t_score =
0.1 * shanghaiDataCld$alumni + 0.2 * shanghaiDataCld$award + 0.2 * shanghaiDataCld$hici +
0.2 * shanghaiDataCld$ns + 0.2 * shanghaiDataCld$pub + 0.1 * shanghaiDataCld$pcp
shanghaiDataCld$total_score[is.na(shanghaiDataCld$total_score)] = shanghaiDataCld$t_score[is.na(shanghaiDataCld$total_score)]
shanghaiDataCld = shanghaiDataCld[complete.cases(shanghaiDataCld),]
#Fix the duplicate name for University of California-Berkeley
shanghaiDataCld$university_name[shanghaiDataCld$university_name=="University of California-Berkeley"] <- "University of California, Berkeley"
shanghaiDataCld %>% group_by(year) %>%
top_n(10, wt = total_score) %>% select(year,university_name,total_score,alumni, award, hici, ns, pub, pcp) %>% ungroup() -> top10univ
#draw with plotly
plot_ly(top10univ, x = ~year) %>%
add_trace(y = top10univ$total_score, name = top10univ$university_name, showlegend=TRUE, type = 'scatter', mode = 'lines+markers', color= top10univ$university_name) %>%
layout(title="Shanghai (ARWU) World Ranks (2005-2015)<br>Best ranked universities based on total score", legend = list(orientation = 'h'),
xaxis = list(showticklabels = TRUE, tickangle = 0, tickfont = list(size = 8)),
yaxis = list(title = "Total score"),
hovermode = 'compare')
top10SpiderWebYear <- function(nYear) {
top10univ %>% filter(year==nYear) %>% ungroup() -> top10u
top10 <- as.data.frame(cbind(top10u[,c(3,4,5,6,7,8,9)]))
colnames(top10) <- c("Total Score", "Alumni with Nobel", "Awarded Nobel", "Highly Cited",
"Nature&Science", "Publications", "PCAP")
rownames(top10) <- top10u$university_name
rmin <- apply(top10,2,min); rmax <- apply(top10,2,max)
rmax <- 100
rmin <- 0
colors_border=c( "tomato", "blue", "gold", "green", "magenta",
"yellow", "grey", "lightblue", "brown", "red", "lightgreen", "cyan" )
par(mfrow=c(4,3))
par(mar=c(1,1,5,1))
for(i in 1:nrow(top10)){
colorValue<-(col2rgb(as.character(colors_border[i]))%>% as.integer())/255
radarchart(rbind(rmax,rmin,top10[i,]),
axistype=2 ,
pcol=rgb(colorValue[1],colorValue[2],colorValue[3], alpha = 1),
pfcol=rgb(colorValue[1],colorValue[2],colorValue[3], alpha = 0.5),
plwd=1 , plty=1,cglcol="grey", cglty=1, axislabcol="grey", cglwd=0.5,vlcex=0.7,
title=rownames(top10[i,]))
}
title(paste0('\nShanghai World University Rankings top 10 (',nYear,')'),outer=TRUE,col.main='black',cex.main=1.5)
}
top10SpiderWebYear(2005)
top10SpiderWebYear(2006)
top10SpiderWebYear(2007)
top10SpiderWebYear(2008)
top10SpiderWebYear(2009)
top10SpiderWebYear(2010)
top10SpiderWebYear(2011)
top10SpiderWebYear(2012)
top10SpiderWebYear(2013)
top10SpiderWebYear(2014)
top10SpiderWebYear(2015)
merge(shanghaiDataCld,schoolCountry, by.x="university_name", by.y="school_name") -> scData
scData %>% group_by(country) %>% summarise(n = length(alumni)) %>% top_n(10,n) %>% ungroup() -> cs
scData %>% filter(country %in% cs$country) %>%
ggplot(aes(x=country, y=alumni, col=country)) + guides(col=FALSE) +
geom_boxplot() + theme_bw() + coord_flip() +
labs(x="Country", y="Alumni with Nobel (score)",
title="Alumni with Nobel (score)", subtitle="Grouped by country") -> d1
scData %>% filter(country %in% cs$country) %>%
ggplot(aes(x=country, y=award, col=country)) + guides(col=FALSE) +
geom_boxplot() + theme_bw() + coord_flip() +
labs(x="Country", y="Nobel awards score",
title="Nobel awards score", subtitle="Grouped by country") -> d2
scData %>% filter(country %in% cs$country) %>%
ggplot(aes(x=country, y=hici, col=country)) + guides(col=FALSE) +
geom_boxplot() + theme_bw() + coord_flip() +
labs(x="Country", y="Highly cited score",
title="Highly cited score", subtitle="Grouped by country") -> d3
scData %>% filter(country %in% cs$country) %>%
ggplot(aes(x=country, y=ns, col=country)) + guides(col=FALSE) +
geom_boxplot() + theme_bw() + coord_flip() +
labs(x="Country", y="Nature & Science publications score",
title="Nature & Science publications score", subtitle="Grouped by country") -> d4
scData %>% filter(country %in% cs$country) %>%
ggplot(aes(x=country, y=pub, col=country)) + guides(col=FALSE) +
geom_boxplot() + theme_bw() + coord_flip() +
labs(x="Country", y="Publications score",
title="Publications score", subtitle="Grouped by country") -> d5
scData %>% filter(country %in% cs$country) %>%
ggplot(aes(x=country, y=pcp, col=country)) + guides(col=FALSE) +
geom_boxplot() + theme_bw() + coord_flip() +
labs(x="Country", y="Per capita performance score",
title="Per capita performance score", subtitle="Grouped by country") -> d6
grid.arrange(d1,d2,d3, d4, d5, d6, ncol=2)
scData %>% group_by(country, year) %>%
summarise(nr = length(total_score), minw=min(total_score), maxw=max(total_score), avgw=round(mean(total_score),0)) %>%
select(country, year, nr, minw, maxw, avgw) %>% ungroup() -> swur
# light grey boundaries
l <- list(color = toRGB("grey"), width = 0.5)
swur$hover <- with(swur,
paste("Country: ", country, '<br>',
"Year: ",year, "<br>",
"Universities: ", nr, "<br>",
"Min total score: ", minw, "<br>",
"Max total score: ", maxw, "<br>",
"Mean total score: ", avgw,"<br>"
))
# specify map projection/options
g <- list(
showframe = TRUE,
showcoastlines = TRUE,
projection = list(type = 'Mercator')
)
plot_geo(swur, locationmode = 'country names') %>%
add_trace(
z = ~nr, color = ~nr, colors = 'Spectral', frame = ~year,
text = ~hover, locations=~country, marker = list(line = l)
) %>%
colorbar(title = 'Number of\nuniversities', tickprefix = '') %>%
layout(
title = with(swur, paste('Number of universities<br>Source:<a href="http://www.shanghairanking.com">Shanghai Academic World University Rankings</a>')),
geo = g
)
#replace first the missing values (`-`) with NA
timesData$teaching[timesData$teaching=='-'] <- NA
timesData$international[timesData$international=='-'] <- NA
timesData$research[timesData$research=='-'] <- NA
timesData$citations[timesData$citations=='-'] <- NA
timesData$income[timesData$income=='-'] <- NA
timesData$total_score[timesData$total_score=='-'] <- NA
#replace factors with numeric
timesData$teaching <- as.numeric(as.character(timesData$teaching))
timesData$international <- as.numeric(as.character(timesData$international))
timesData$research <- as.numeric(as.character(timesData$research))
timesData$citations <- as.numeric(as.character(timesData$citations))
timesData$income <- as.numeric(as.character(timesData$income))
timesData$total_score <- as.numeric(as.character(timesData$total_score))
# replace NAs with 0
timesData$income[is.na(timesData$income)] <- 0
timesData$international[is.na(timesData$international)] <- 0
#calculate the total score
timesData$t_score =
0.3 * as.numeric(as.character(timesData$teaching)) +
0.075 * as.numeric(as.character(timesData$international)) +
0.3 * as.numeric(as.character(timesData$research)) +
0.3 * as.numeric(as.character(timesData$citations)) +
0.025 * as.numeric(as.character(timesData$income))
#replace the total_score where missing with the calculated value
timesData$total_score[is.na(timesData$total_score)] <- timesData$t_score[is.na(timesData$total_score)]
timesData$wr = as.numeric(as.character(timesData$world_rank))
## Warning: в результате преобразования созданы NA
thePlotYear <- function(nYear) {
timesData %>% filter(year==nYear) %>% top_n(10,-wr) %>%
ggplot(aes(x=reorder(university_name,-wr), y=wr)) + geom_bar(stat="identity", aes(fill=reorder(university_name,-wr)), colour="black") +
theme_bw() + coord_flip() + scale_fill_manual(values=c(rep("lightgreen",7), "#CD7F32", "grey", "gold")) + guides(fill=FALSE) +
labs(x="University name", y="World Rank",
title=paste("Rank in ",nYear), subtitle="(smaller value is better)")
}
thePlotYear(2011) -> d1
thePlotYear(2012) -> d2
thePlotYear(2013) -> d3
thePlotYear(2014) -> d4
thePlotYear(2015) -> d5
thePlotYear(2016) -> d6
grid.arrange(d1,d2,d3,d4,d5,d6, ncol=2)
timesData %>% group_by(year) %>%
top_n(10, wt = total_score) %>%
select(year,university_name,total_score,teaching, international, research, citations, income) %>% ungroup() -> top10univ
theTop10SpiderWebYear <- function(nYear) {
top10univ %>% filter(year==nYear) %>% ungroup() -> top10u
top10 <- as.data.frame(cbind(top10u[,c(3,4,5,6,7,8)]))
colnames(top10) <- c("Total Score", "Teaching", "International Outlook", "Research",
"Citations","Industry Income")
rownames(top10) <- top10u$university_name
rmin <- apply(top10,2,min); rmax <- apply(top10,2,max)
rmax <- 100
rmin <- 0
colors_border=c( "tomato", "blue", "gold", "green", "magenta",
"yellow", "grey", "lightblue", "brown", "red", "lightgreen", "cyan" )
par(mfrow=c(4,3))
par(mar=c(1,1,5,1))
for(i in 1:nrow(top10)){
colorValue<-(col2rgb(as.character(colors_border[i]))%>% as.integer())/255
radarchart(rbind(rmax,rmin,top10[i,]),
axistype=2 ,
pcol=rgb(colorValue[1],colorValue[2],colorValue[3], alpha = 1),
pfcol=rgb(colorValue[1],colorValue[2],colorValue[3], alpha = 0.5),
plwd=1 , plty=1,cglcol="grey", cglty=1, axislabcol="grey", cglwd=0.5,vlcex=0.7,
title=rownames(top10[i,]))
}
title(paste0('\nTimes Higher Education World University Rankings top 10 (',nYear,')'),outer=TRUE,col.main='black',cex.main=1.5)
}
theTop10SpiderWebYear(2011)
theTop10SpiderWebYear(2012)
theTop10SpiderWebYear(2013)
theTop10SpiderWebYear(2014)
theTop10SpiderWebYear(2015)
theTop10SpiderWebYear(2016)
timesData %>% group_by(country) %>% summarise(n = length(teaching)) %>% top_n(10,n) %>% ungroup() -> ct
timesData %>% filter(country %in% ct$country) %>%
ggplot(aes(x=country, y=teaching, col=country)) + guides(col=FALSE) +
geom_boxplot() + theme_bw() + coord_flip() +
labs(x="Country", y="Teaching score",
title="Teaching score", subtitle="Grouped by country") -> d1
timesData %>% filter(country %in% ct$country) %>%
ggplot(aes(x=country, y=international, col=country)) + guides(col=FALSE) +
geom_boxplot() + theme_bw() + coord_flip() +
labs(x="Country", y="International outlook score",
title="International outlook score", subtitle="Grouped by country") -> d2
timesData %>% filter(country %in% ct$country) %>%
ggplot(aes(x=country, y=research, col=country)) + guides(col=FALSE) +
geom_boxplot() + theme_bw() + coord_flip() +
labs(x="Country", y="Research score",
title="Research score", subtitle="Grouped by country") -> d3
timesData %>% filter(country %in% ct$country) %>%
ggplot(aes(x=country, y=citations, col=country)) + guides(col=FALSE) +
geom_boxplot() + theme_bw() + coord_flip() +
labs(x="Country", y="Citations score",
title="Citations score", subtitle="Grouped by country") -> d4
timesData %>% filter(country %in% ct$country) %>%
ggplot(aes(x=country, y=income, col=country)) + guides(col=FALSE) +
geom_boxplot() + theme_bw() + coord_flip() +
labs(x="Country", y="Industry income score",
title="Industry income score", subtitle="Grouped by country") -> d5
timesData %>% filter(country %in% ct$country) %>%
ggplot(aes(x=country, y=total_score, col=country)) + guides(col=FALSE) +
geom_boxplot() + theme_bw() + coord_flip() +
labs(x="Country", y="Total score",
title="Total score", subtitle="Grouped by country") -> d6
grid.arrange(d1,d2,d3,d4, d5, d6, ncol=2)
timesData$total_score = as.numeric(as.character(timesData$total_score))
#replace with 0 the missing total_score values - this will affect the aggregated values
timesData %>% group_by(country,year) %>%
summarise(nr = length(total_score), minw=min(total_score), maxw=max(total_score), avgw=round(mean(total_score),0)) %>%
select(country, year, nr, minw, maxw, avgw) %>% ungroup() -> ther
# light grey boundaries
l <- list(color = toRGB("grey"), width = 0.5)
ther$hover <- with(ther,
paste("Country: ", country, '<br>',
"Year: ",year, "<br>",
"Universities: ", nr, "<br>",
"Min total score: ", minw, "<br>",
"Max total score: ", maxw, "<br>",
"Mean total score: ", avgw,"<br>"
))
# specify map projection/options
g <- list(
showframe = TRUE,
showcoastlines = TRUE,
projection = list(type = 'Mercator')
)
plot_geo(ther, locationmode = 'country names') %>%
add_trace(
z = ~nr, color = ~nr, colors = 'Spectral', frame = ~year,
text = ~hover, locations=~country, marker = list(line = l)
) %>%
colorbar(title = 'Number of\nuniversities', tickprefix = '') %>%
layout(
title = with(ther, paste('Number of universities<br>Source:<a href="https://www.timeshighereducation.com/world-university-rankings">Times Higher Education World University Ranking</a>')),
geo = g
)